Variable trees are a new way to explore discrete multivariate data. They display nested subsets and corresponding frequencies and percentages. Manual calculation of these quantities is laborious and error-prone, especially when there are many multi-level factors and missing data. In this tutorial, you’ll learn about the {vtree} package and how to use it to quickly generate variable trees and gain new insights into your data. Using COVID-19 examples, you’ll also learn how to prune variable trees, display summary information, label variables and nodes, explore missing values, and more.
data <- read_spss("Viral_Communication_Phase_I-III.sav")
#data <- read_spss("https://zenodo.org/record/5779516/files/Viral_Communication_Phase_I-III.sav")
d <- as_factor(data)
#View(Hmisc::label(d))agreement <- function(x,neutralAgree=TRUE,agree="Agree",disagree="Disagree") {
neutral <- if (neutralAgree) agree else disagree
case_when(
{{x}} == "Strongly Agree" ~ agree,
{{x}} == "Agree" ~ agree,
{{x}} == "Somewhat Agree" ~ agree,
{{x}} == "Neutral" ~ neutral,
{{x}} == "Somewhat Disagree" ~ disagree,
{{x}} == "Disagree" ~ disagree,
{{x}} == "Strongly Disagree" ~ disagree)
}d <- d %>%
mutate(
Phase_1=M_PHASE1_COMPETION,
Phase_2=M_PHASE2_COMPLETION,
Phase_3=M_PHASE3_COMPLETION,
politics_1=factor(case_when(
SD_POL_ORIENTATION == "Left" ~ "Left",
SD_POL_ORIENTATION == "-2" ~ "Left",
SD_POL_ORIENTATION == "-1" ~ "Left",
SD_POL_ORIENTATION == "Centre" ~ "Centre",
SD_POL_ORIENTATION == "1" ~ "Right",
SD_POL_ORIENTATION == "2" ~ "Right",
SD_POL_ORIENTATION == "Right" ~ "Right"),levels=c("Left","Centre","Right")),
mask_1=case_when(
PHASE1_AC_EFF_MASK == "Extremely effective" ~ "Effective",
PHASE1_AC_EFF_MASK == "Very effective" ~ "Effective",
PHASE1_AC_EFF_MASK == "Effective" ~ "Effective",
PHASE1_AC_EFF_MASK == "Somewhat effective" ~ "Less effective",
PHASE1_AC_EFF_MASK == "Not effective at all effective" ~ "Less effective"),
world_1=agreement(PHASE1_AS_WORLD,neutralAgree=TRUE),
science_eb_1=case_when(
PHASE1_AS_BOR_EXC == "Exciting" ~ "Exciting",
PHASE1_AS_BOR_EXC == "2" ~ "Exciting",
PHASE1_AS_BOR_EXC == "1" ~ "Exciting",
PHASE1_AS_BOR_EXC == "0" ~ "Not Exciting",
PHASE1_AS_BOR_EXC == "-1" ~ "Not Exciting",
PHASE1_AS_BOR_EXC == "-2" ~ "Not Exciting",
PHASE1_AS_BOR_EXC == "Boring" ~ "Not Exciting"),
harmless_1=agreement(PHASE1_RA_HARMLESS_r,neutralAgree=TRUE,
agree="Harmless",disagree="Not Harmless"),
finance_1=agreement(PHASE1_RA_FINANCE,neutralAgree=FALSE),
economy_1=agreement(PHASE1_RA_ECONOMY,neutralAgree=FALSE))vtree(d,"Phase_1 SD_GENDER",labelvar=c(Phase_1="Completed Phase 1",SD_GENDER="Gender"),splitwidth=Inf)vtree(u,"science_eb_1 harmless_1 mask_1",
labelvar=c(science_eb_1="Science is",harmless_1="Covid is"),pattern=TRUE)vtree(u,"politics_1",splitwidth=Inf,labelvar=c(politics_1="Political orientation"),
summary=c("harmless_1=Harmless \nCovid is harmless: %pct%","mask_1=Effective \nMasks are effective: %pct%"))